package com.chuangxin.data.kid3;

import com.chuangxin.data.core.WebPageFetcher;
import com.chuangxin.data.core.io.FileWriter;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import java.io.File;

/**
 * Created by Dawnwords on 2015/2/2.
 */
public class Kid3Fetcher extends WebPageFetcher {

    protected String BASE = "http://www.3kid.net";

    public Kid3Fetcher(String start, String outDir) {
        super(0.5, new FileWriter("E:" + File.separator + "kid3" + File.separator + outDir));
        putURL(BASE + start);
        execute();
    }

    @Override
    protected TitleContent processDoc(Document doc, String url) {
        Elements a = doc.select("div#pnr > a");
        if (a.size() > 0) {
            putURL(BASE + a.eq(0).attr("href"));
        }
        String title = doc.select("span.a_title_m").eq(0).html();
        Elements wrapper = doc.select("div#flash > div.intro");
        wrapper.select("h3").remove();
        String content = wrapper.html();
        return new TitleContent(title, content);
    }

}
